I wanted to visualize the change in state populations between 2010 and 2019, and provide plots detailing the change in median income over this time period in an attempt to provide an economic explanation for the migration of people. This data was downloaded from the US census bureau from both the Decennial and American Community Survey data repositiories.
# Load libraries
library(ggplot2)
library(rgeos)
library(rworldmap)
library(sf)
library(tidyr)
library(dplyr)
library(readr)
library(lubridate)
library(tidycensus)
library(stringr)
library(plotly)
library(leaflet)
library(leafpop)
library(rlang)
library(purrr)
# Get 2010 population data, save state name, fips and population columns
statepop.2010 <- get_decennial(geography = 'state', variables = 'P001001', year = 2010)
## Getting data from the 2010 decennial Census
## Using Census Summary File 1
statepop.2010 <- statepop.2010 %>% select(NAME, fips = GEOID, pop2010 = value)
statepop.2010$fips <- as.integer(statepop.2010$fips)
# Get 2019 (2020 is unavailable?) population data, save fips and pop
statepop.2019 <- get_acs(geography = 'state', variables = 'B01001_001', year = 2019)
## Getting data from the 2015-2019 5-year ACS
statepop.2019 <- statepop.2019 %>% select(fips = GEOID, pop2019 = estimate)
statepop.2019$fips <- as.integer(statepop.2019$fips)
# Merge datasets by fips, calculate population change
statepop.diff <- merge(statepop.2010, statepop.2019, by = 'fips')
statepop.diff$popChange <- statepop.diff$pop2019 - statepop.diff$pop2010
statepop.diff <- statepop.diff %>% select(NAME, popChange)
names(statepop.diff) <- c('state', 'popChange')
# Merge map and population data
popChange.map <- map_data('state') %>%
mutate(region = str_to_title(region)) %>%
left_join(statepop.diff, by = c('region' = 'state'))
# plot map of states with covid data
p_map <- ggplot(data = popChange.map,
mapping = aes(x = long, y = lat,
group = group, # groups points of state borders together
fill = popChange, # adds fill color by pop change
text = paste("2010 to 2019 Population Change:\n", popChange))) +
geom_polygon(color = 'white') +
ggdendro::theme_dendro() +
scale_fill_viridis_c(option = 'magma') +
guides(fill = guide_legend(title = 'State Population change between 2010 and 2019')) +
coord_map("conic", lat0 = 30)
# geographic state center data obtained from:
# https://developers.google.com/public-data/docs/canonical/states_csv
# Get state centroid longitude and latitude for leaflet plots
state.midpoint <- read.csv('C:\\Users\\Will\\Desktop\\STAA 566\\AssignmentGitRepos\\HW3_Paces_Will\\StateCentroids.csv')
names(state.midpoint) <- c('name', 'latitude', 'longitude')
# Download median income data per state between 2010 and 2019
years <- c(2010:2019)
state.income <- map_dfr(years,
~get_acs(geography = 'state', variables = 'B19013_001',
year = .x, geometry = F),
.id = 'year.id') %>%
select(year.id, NAME, median.income = estimate) %>% # remove unnecessary variables
mutate(year = as.integer(year.id) + 2009) %>%
select(-year.id)
## Getting data from the 2006-2010 5-year ACS
## Getting data from the 2007-2011 5-year ACS
## Getting data from the 2008-2012 5-year ACS
## Getting data from the 2009-2013 5-year ACS
## Getting data from the 2010-2014 5-year ACS
## Getting data from the 2011-2015 5-year ACS
## Getting data from the 2012-2016 5-year ACS
## Getting data from the 2013-2017 5-year ACS
## Getting data from the 2014-2018 5-year ACS
## Getting data from the 2015-2019 5-year ACS
#state.income$NAME <- factor(state.income$NAME, levels = state.midpoint$name)
state.income <- state.income[order(state.income$NAME), ]
# Function to generate state median income plots
make.ts.plots <- function(name){
ggplot(dplyr::filter(state.income, NAME == name)) +
geom_line(aes(x = year, y = median.income)) +
scale_x_discrete(years) +
theme_minimal() +
labs(x = 'Year', y = 'State Median Annual Income') +
ggtitle(paste0("Median Annual Income in ", name, "\nBetween 2010 and 2019"))
}
state.inc.plots <- lapply(unique(state.income$NAME), make.ts.plots)
# Prepare state midpoint data
state.avg.inc <- state.income %>%
group_by(NAME) %>%
summarise(median.income = mean(median.income)) %>%
select(name = NAME, median.income)
state.midpoint <- merge(state.midpoint, state.avg.inc, by = 'name')
# Plot state centers with leaflet and add plots with leafpop
leaflet(state.midpoint) %>% addTiles() %>%
addAwesomeMarkers(~longitude, ~latitude,
popup = popupGraph(state.inc.plots))